## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

Introduction

Objectives

In this notebook I am conducting an exploratory data analysis focused on the green space deprivation dataset released by Friends of the Earth (referred to as the FoE Green Space dataset). This is a dataset that accompanied the Friends of the Earth “England’s Green Space Gap Report,” published autumn 2020.

My objectives within this notebook are to:

  • Understand the scope and nature of the FoE Green Space dataset;

  • Generate and refine interesting questions that can be addressed using the FoE Green Space dataset (and other complementary datasets);

  • Develop ideas for analysis projects, which could the FoE Green Space dataset and generate outputs which could be used by policy-makers and local authorities with interests in green space and it’s accessibility.

The anticipated audience

  • The primary audience for the notebook is myself. As the understanding developed within it will underpin some future data analysis projects.

  • The notebook might be of interest to companies and individuals recruiting data scientists, and will form part of my portfolio of work.

Writing style

The status of the notebook

The datasets used

All data files are in the ‘Data’ folder (i.e. /foe_green_space/Data).

File Name Variable name in this notebook Notes More details

(FOE) Green Space Consolidated Data -

England - Version 2.1.xlsx

green_space Friends of the Earth

Local_Authority_District_to_Region

__December_2019__Lookup_in_England.csv

LAD_to_region Using the December 2019 version ons

Rural_Urban_Classification_(2001)_for_MSOAs

_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv

urban_rural_

classification

Using the 2001 version of the data (the most recently available) ons

A summary of findings / conclusions

Other notes

Context

National:

  • Funding challenges parks
  • Housing shortages
  • Leveling up agenda

The Friends of the Earth Report - “England’s Green Space Gap”

Questions for Exploratory Data Analysis

Where is green space deprivation experienced?

Who is living with green space deprivation?

The relationship between green space deprivation and health?

Ideas:

Read in and clean data

green_space <- read_excel("../Data/(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx",
                          sheet = "MSOAs V2.1") %>%
  # inconsistent naming conventions for variables are used in the source data
  # some need to clean names for consitency
  clean_names()
## New names:
## * `` -> ...1
green_space %>% 
  select(la_code)
# FOE green space data doesn't include a variable for the region each MSOA/local authority lies within
# So, import a look up table from the ONS, which maps between Local Authority Districts and Regions
LAD_to_region <- read_csv("../Data/Local_Authority_District_to_Region__December_2019__Lookup_in_England.csv") %>% 
  select(-FID, -LAD19NM)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   FID = col_double(),
##   LAD19CD = col_character(),
##   LAD19NM = col_character(),
##   RGN19CD = col_character(),
##   RGN19NM = col_character()
## )
LAD_to_region
# add region names and codes (from ONS look up) to FOE green space data
green_space <- green_space %>% 
  left_join(LAD_to_region, by = c("la_code" = "LAD19CD")) %>% 
  relocate(RGN19CD, RGN19NM, .before = area) %>% 
  rename(region_code = RGN19CD,
         region_name = RGN19NM)

green_space

The distribution of green space deprivation ratings across England

green_space %>% 
  tabyl(green_space_deprivation_rating)

https://themockup.blog/posts/2020-09-04-10-table-rules-in-r/ http://applied-r.com/r-color-tables/

to add?

colour_gs_row <- function(df, colour, rating){
  df %>% 
    tab_style(style = list(cell_fill(color = colour_scale[[colour]])),
                locations = cells_body(rows = green_space_deprivation_rating == rating)
                )
}

colour_gs_rows <-  function(df){
  
  df %>% 
    colour_gs_row(colour = 5, rating = "E") %>%
    colour_gs_row(colour = 4, rating = "D") %>%
    colour_gs_row(colour = 3, rating = "C") %>%
    colour_gs_row(colour = 2, rating = "B") %>%
    colour_gs_row(colour = 1, rating = "A")
}

# set green to grey colour scale for tables and graphs
colour_palette <- scales::seq_gradient_pal(low = "palegreen4", high = "grey")
colour_scale <- colour_palette(seq(0, 1, length.out = 5))

background_colour <- "#EDEDED"

# set labels for grouping the rows in the table below
d_e_group_label <- "\nUrgent action needed to improve access to green space"
a_b_c_group_label <-  "\nAction needed to protect access to green space"


# for each green space deprivation rate calculate the numbers and proportions of msoas
msoa_by_rating <- green_space %>% 
  tabyl(green_space_deprivation_rating) %>%
  mutate(percent = percent * 100) %>% 
  rename(n_msoa = n,
         percent_msoa = percent)

# for each green space deprivation rate calculate the numbers and proportions of the population
pop_by_rating <- green_space %>% 
  group_by(green_space_deprivation_rating) %>% 
  summarise(population = sum(population_imd)) %>% 
  
  ungroup() %>% 
  mutate(percent_pop = population / sum(population) * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
# merge the msoa and population tables
msoa_and_pop_by_rating <- msoa_by_rating %>% 
  left_join(pop_by_rating, by = c("green_space_deprivation_rating" = "green_space_deprivation_rating"))


msoa_and_pop_by_rating %>%
  
  add_column(group = c(rep(a_b_c_group_label, 3), rep(d_e_group_label, 2))) %>% 
  arrange(desc(green_space_deprivation_rating)) %>% 


  gt(rowname_col = "green_space_deprivation_rating", groupname_col = "group") %>%
  
    # add spanners
    tab_spanner(label = "Neighbourhoods",
                columns = 2:3) %>% 
  
    tab_spanner(label = "Population",
                columns = 4:5) %>% 
  
    # format columns
    fmt_number(starts_with("percent_"), decimals = 0) %>%
    fmt_number(vars(population), scale_by = 1e-6) %>%
  
    # create and format summary rows for each group
    summary_rows(groups = TRUE,
                 columns = vars(population),
                 fns = list(Total = ~sum(.)),
                 formatter = fmt_number,
                 scale_by = 1e-6) %>%
  
    summary_rows(groups = TRUE,
                 columns = vars(n_msoa, percent_msoa, percent_pop),
                 fns = list(Total = ~sum(.)),
                 formatter = fmt_number,
                 decimals = 0
                 ) %>%

    
    # colour rows using the function defined above
    colour_gs_rows() %>% 

    # add table labels
    tab_stubhead(label = html("Green Space<br>Deprivation<br>Rating")) %>%  
    cols_label(n_msoa = "Number",
               percent_msoa = "%",
               population = "Millions of people",
               percent_pop = "%") %>% 
    tab_source_note(source_note = md("**Source:** Friends of the Earth")) %>% 
    tab_header(title = md("**Green Space Deprivation in England**"),
               subtitle = md("Understanding the scale of the problem")
               ) %>%

    # adjust the appearance of table elements
    tab_options(heading.align = "left",
                heading.subtitle.font.size = 12,
                table.border.top.color = "black",
                column_labels.border.bottom.color = "black",
                column_labels.border.bottom.width = 3,
                column_labels.background.color = background_colour, 
                summary_row.background.color = background_colour,
                row_group.background.color = "#FFFFFF",
                stub.background.color = background_colour, 
                source_notes.font.size = 10) %>%
    
    # adjust styling of table text
    tab_style(style = list(cell_text(style = "italic")),
          locations = list(cells_body(columns = c(3,5)),
                           cells_row_groups(),
                           cells_summary(columns = vars(percent_msoa, percent_pop)))
          ) %>% 
  
    tab_style(style = list(cell_text(weight = "bold")),
          locations = list(cells_summary(),
                           cells_column_labels(everything())
                           )
          ) %>%
  
    tab_style(style = list(cell_text(size = 12)),
              locations = list(cells_row_groups(),
                               cells_column_labels(everything()))
              ) %>% 
  
  
    tab_style(style = list(cell_text(weight = "bold", size = 14)),
              locations = list(cells_stubhead(), 
                               cells_column_spanners(spanners = c("Neighbourhoods", "Population")),
                               cells_stub()
                               )
              ) %>% 

    tab_style(style = list(cell_text(align = "center")),
                           locations = cells_stub()
                           )
Green Space Deprivation in England
Understanding the scale of the problem
Green Space
Deprivation
Rating
Neighbourhoods Population
Number % Millions of people %
Urgent action needed to improve access to green space
E 1108 16 9.62 18
D 955 14 8.21 15
Total 2,063 30 17.84 33
Action needed to protect access to green space
C 1727 25 13.54 25
B 1360 20 10.77 20
A 1641 24 12.58 23
Total 4,728 70 36.89 67
Source: Friends of the Earth
# As a starting point - select the three indices (GSDI_...) that make up the rating (and the data that these indices are calculated from)
gs_dep_by_rating <- green_space %>% 

  select(green_space_deprivation_rating, 
         gsdi_avg_area, green_space_area_per_capita,
         gsdi_access, pcnt_pop_area_with_go_space_access, 
         gsdi_garden, garden_area_per_capita)

# trying to understand what a typical MSOA in each rating looks like ...
gs_dep_by_rating %>% 
  
  group_by(green_space_deprivation_rating) %>%
  summarise(mean_gs_per_c = mean(green_space_area_per_capita, na.rm = TRUE),
            median_gs_per_c = median(green_space_area_per_capita, na.rm = TRUE),
            mean_perc_pop_with_gs_access = mean(pcnt_pop_area_with_go_space_access, na.rm = TRUE),
            median_perc_pop_with_gs_access = median(pcnt_pop_area_with_go_space_access, na.rm = TRUE),
            mean_gard_area_per_C = mean(garden_area_per_capita, na.rm = TRUE),
            median_gard_area_per_C = median(garden_area_per_capita, na.rm = TRUE)
            )
## `summarise()` ungrouping output (override with `.groups` argument)

Green space area per capita

dealing with outliers Boxplot shows outliers at 1.5*IQR + Q3 - they are part of the natural variability of the population, so it seems appropriate to retain the outliers, but zoom on the graphs because the .

box_plot_mapping = aes(x = green_space_area_per_capita,
                       y = green_space_deprivation_rating)

# may be plotting will help to understand what a typical MSOA in each rating looks like
p <- ggplot(data = gs_dep_by_rating,
            mapping = box_plot_mapping
            )

p + geom_boxplot(aes(group = green_space_deprivation_rating))

# the outliers (in terms of very high values of green space per capita) are making it difficult to get a handle on the distributions
# so identify the approximate value where outliers begin
p <- ggplot(data = gs_dep_by_rating,
            mapping = aes(x = green_space_area_per_capita))

p + geom_boxplot()

# taking the dataset as whole outliers begin at approx. 62.5m2 gs per capita
# this seems like a low value to use as a cut off point for outliers
# so lets look at group A in detail
p <- ggplot(data = gs_dep_by_rating %>% 
              filter(green_space_deprivation_rating == "A"),
            mapping = box_plot_mapping)

p + geom_boxplot() +
  coord_cartesian(xlim = c(0, 1000))

# outliers from rating group A start at approximately 500m2 gs per capita

not sure on whether or not to filter out outliers

p <- ggplot(data = gs_dep_by_rating, #%>% 
            #filter(green_space_area_per_capita < 500),
            mapping = box_plot_mapping
            )

p + geom_jitter(colour = "grey80") +
  geom_boxplot(mapping = aes(group = green_space_deprivation_rating, 
                               fill = green_space_deprivation_rating
                               ),
                 alpha = 0.5
                 ) + 
  coord_cartesian(xlim = c(0, 500)) +
  
  guides(fill = FALSE)

gs_pc_summary <- green_space %>% 
  group_by(green_space_deprivation_rating) %>% 
  summarise(
    min = min(green_space_area_per_capita),
    median = median(green_space_area_per_capita),
    max = max(green_space_area_per_capita),
    iqr = IQR(green_space_area_per_capita),
    mean = mean(green_space_area_per_capita),
    sd = sd(green_space_area_per_capita)
  )
## `summarise()` ungrouping output (override with `.groups` argument)
gs_pc_summary
p <- ggplot(data = gs_dep_by_rating, #%>% 
              #filter(green_space_area_per_capita < 500),
            mapping = aes(x = green_space_area_per_capita,
                          y = green_space_deprivation_rating,
                          fill = green_space_deprivation_rating)
            )

p + geom_density_ridges(alpha = 0.5) + xlim(0,200) + 
  
  
  guides(fill = FALSE)
## Picking joint bandwidth of 3.84
## Warning: Removed 590 rows containing non-finite values (stat_density_ridges).

So, I wondered if the outliers/very long tail are a result of areas with small populations and/very large areas of green space.

p <- ggplot(data = green_space,
            mapping = aes(x = population_imd,
                          colour = green_space_deprivation_rating)
            )

p + geom_point(mapping = aes(y = green_space_area), 
               alpha = 0.5) +
  
  labs(colour = "GS rating") +
  facet_wrap(~green_space_deprivation_rating)

p + geom_point(mapping = aes(y = log(green_space_area)),
               alpha = 0.5) +
  labs(colour = "GS rating")

p <- ggplot(data = green_space, 
            mapping = aes(x = log(green_space_area),
                          )
            )
p + geom_density()
## Warning: Removed 115 rows containing non-finite values (stat_density).

p <- ggplot(data = green_space, 
            mapping = aes(x = population_imd,
                          )
            )
p + geom_density()

So, it looks like the it is the green space area has much more influence on green space area per capita, than population.

p <- ggplot(data = green_space,
            mapping = aes(y = green_space_area_per_capita,
                          colour = green_space_deprivation_rating)
            )

p + geom_point(mapping = aes(x = green_space_area), 
               alpha = 0.5) +
  labs(colour = "GS rating") +
  facet_wrap(~ green_space_deprivation_rating, scales = "free")

p <- p + geom_point(mapping = aes(x = population_imd), 
               alpha = 0.5) +
  labs(colour = "GS rating")

p

p + ylim(0, 500) + facet_wrap(~ green_space_deprivation_rating, scales = "free_y")
## Warning: Removed 288 rows containing missing values (geom_point).

So, lets look at the distribution of green_space_area itself. This is relatively tricky given the wide range of values (as shown in the summary stats). I tried histograms and density plots too, but a box plot seemed the best way to understand the distribution. The first boxplot shows the full distribution and as a result is very difficult to interpret as the large outliers to the right of plot result in the box itself appearing as a single line and hence being very difficult to interpret. In the second plot hte x axis is cropped so it is straight forward to interpret the box component of the plot. However, this comes at the cost of failing to show the very large outliers within the distribution.

The extreme skew of the distribution can be seen in the summary statistics below. The median for green_space_area is 152,418 m2 while the maximum 636,087,671 m2.

gs_area_summary <- function(variable, xlim){
  
  print(green_space %>% select({{variable}}) %>% summary())

  p <- ggplot(data = green_space,
              mapping = aes(x = {{variable}}))
  
  plot(p + geom_boxplot())
  
  p + geom_boxplot() +
    coord_cartesian(xlim = xlim)
  
}

gs_area_summary(green_space_area, c(0, 1e6))
##  green_space_area   
##  Min.   :        0  
##  1st Qu.:    64273  
##  Median :   152418  
##  Mean   :  1725346  
##  3rd Qu.:   371044  
##  Max.   :636087671

# trying different lengths of the x axis to try to understand the distribution
  # p <- p + 
  #   geom_boxplot()
  # 
  # c(10, 100, 1000, 10000, 100000, 1e6, 1e7, 1e8, 1e9) %>% 
  #   map(~ p + coord_cartesian(xlim = c(0, .x)))

A similarly extreme right skewed distribution can seen for green_space_area_per_capita, as shown in the plots and summary stats below. It is worth noting just how atypical many of the larger outliers are. The median for green_space_area_per_capita is less than 20 m2 per capita, while the maximum is approximately 100,000 m2 per capita.

gs_area_summary(green_space_area_per_capita, c(0, 100))
##  green_space_area_per_capita
##  Min.   :     0.00          
##  1st Qu.:     8.19          
##  Median :    19.17          
##  Mean   :   244.72          
##  3rd Qu.:    48.03          
##  Max.   :101465.57

So, I thought it was worth a quick look at the population density across English MSOAs. The first graph shows the kernel density function for the population density of English MSOAs. Key features of the distribution include:

The second plot groups MSOAs by their FoE green space deprivation rating and highlights:

green_space %>% 
  select(population_density) %>% 
  summary()
##  population_density 
##  Min.   :5.557e-06  
##  1st Qu.:7.344e-04  
##  Median :2.623e-03  
##  Mean   :3.458e-03  
##  3rd Qu.:4.655e-03  
##  Max.   :2.737e-02
p <- ggplot(data = green_space,
            mapping = aes(x = population_density
                          )
            )

p + geom_density()

p + geom_density(mapping =aes(fill = green_space_deprivation_rating),
                 alpha = 0.4)

Plotting population density against green space area and green space area per capita produces very associations. Note the log scales on both the x and y axis in both cases.

p <- ggplot(data = green_space,
            mapping = aes(x = log(population_density),
                          colour = green_space_deprivation_rating
                          )
            )

p + geom_point(mapping = aes(y = log(green_space_area)),
            alpha = 0.25) +
  labs(colour = "GS rating")

p + geom_point(mapping = aes(y = log(green_space_area_per_capita)),
               alpha = 0.25) +
  labs(colour = "GS rating") 

Green space access

rating_component_plots <- function(){
  
  # looking at the distribution of rating_component
  plot(p + geom_density())
  
  plot(p + geom_density(mapping = aes(fill = green_space_deprivation_rating),
                   alpha = 0.4))
  
  plot(p + geom_boxplot())
  
  plot(p + geom_histogram())
  
  
  # break down prating_component distribution by green space deprivation rating
  p + geom_jitter(mapping = aes(y = green_space_deprivation_rating), 
                alpha = 0.2, 
                colour = "grey50") +
    geom_boxplot(mapping = aes(y = green_space_deprivation_rating,
                              fill = green_space_deprivation_rating), alpha = 0.5) +
    guides(fill = FALSE)
}

p <- ggplot(data = green_space,
            mapping = aes(x = pcnt_pop_with_go_space_access)
            )

rating_component_plots()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# looking at assocation between pcnt_pop_with_go_space_access and the other 
# two variables that inform the green space deprivation rating
p + geom_point(mapping = aes(y = log(green_space_area_per_capita),
                             colour = green_space_deprivation_rating)
               )

p + geom_point(mapping = aes(y = log(garden_area_per_capita),
                             colour = green_space_deprivation_rating)
                 )

# calculate the percentile of the cutoff value used in FoE classification
percentile <- ecdf(green_space$pcnt_pop_with_go_space_access)
percentile(75)
## [1] 0.9471359
green_space
grouped_summary <- function(df, grouping_var, summary_var){
  df %>% 
    group_by({{grouping_var}}) %>% 
    summarise(min = min({{summary_var}}),
              median = median({{summary_var}}),
              max = max({{summary_var}}),
              iqr = IQR({{summary_var}}),
              mean = mean({{summary_var}}),
              sd = sd({{summary_var}})
    )
}

grouped_summary(df = green_space, 
                grouping_var = green_space_deprivation_rating,
                summary_var = pcnt_pop_with_go_space_access) %>% 
  gt()
## `summarise()` ungrouping output (override with `.groups` argument)
green_space_deprivation_rating min median max iqr mean sd
A 0.6317776 20.96466 96.57368 29.79130 26.36973 19.96222
B 25.0390119 45.67634 100.00000 26.67371 49.46104 18.23865
C 0.0000000 21.92089 90.81132 33.62711 28.72732 22.05134
D 2.0380275 45.00738 99.50842 29.69103 46.82851 19.52741
E 0.0000000 24.18513 100.00000 30.29954 28.92837 21.22944

Garden space

# produce plots for garden_area_per_capita
p <- ggplot(data = green_space %>% 
              filter(garden_area_per_capita < 4000),# remove one outlier around 
                                                    # 5000m2 to make plots readable
            mapping = aes(x = garden_area_per_capita)
            )

rating_component_plots()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 
# # 
# p <- ggplot(data = green_space,
#             mapping = aes(x = log(garden_area_per_capita))
#             )
# 
# rating_component_plots()

grouped_summary(df = green_space, 
                grouping_var = green_space_deprivation_rating,
                summary_var = garden_area_per_capita) %>% 
  gt()
## `summarise()` ungrouping output (override with `.groups` argument)
green_space_deprivation_rating min median max iqr mean sd
A 85.9232540 195.80436 822.19669 171.74319 224.52812 119.28676
B 3.0706027 91.22088 5647.41855 43.62001 96.63223 156.75402
C 56.9000926 93.86037 598.47099 67.71153 121.12249 72.93403
D 1.8816985 49.97942 85.89009 30.84960 49.79282 20.31293
E 0.2565869 33.54458 85.33891 36.07619 36.97237 22.67188

Urban-rural classification and green space deprivation ratings

Rural-urban classification at LA scale

https://www.gov.uk/government/statistics/local-authority-rural-urban-classification Rural-Urban Classification of Local Authorities Post-2009 Boundaries

readxl::read_xls("../Data/la-class-dataset-post0409.xls", sheet = "England")

Rural-urban classication at MSOA scale

https://geoportal.statistics.gov.uk/datasets/rural-urban-classification-2001-of-msoas-in-england-and-wales

file_loc <- "../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv"

urban_rural_classification <- readr::read_csv(file_loc) %>%
  clean_names() %>% 
  select(msoa01cd, ruc01nm, ruc01cd, morphology_name, morphology_code, context_name, context_code)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   GOR01CD = col_character(),
##   GOR01NM = col_character(),
##   CTY01CD = col_logical(),
##   CTY01NM = col_logical(),
##   LAD01CD = col_character(),
##   LAD01NM = col_character(),
##   MSOA01CD = col_character(),
##   MSOA01NM = col_character(),
##   RUC01NM = col_character(),
##   RUC01CD = col_double(),
##   `Morphology Name` = col_character(),
##   `Morphology Code` = col_double(),
##   `Context Name` = col_character(),
##   `Context Code` = col_double()
## )
## Warning: 6366 parsing failures.
##  row     col           expected       actual                                                                                               file
## 3599 CTY01CD 1/0/T/F/TRUE/FALSE 09           '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3599 CTY01NM 1/0/T/F/TRUE/FALSE Bedfordshire '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3600 CTY01CD 1/0/T/F/TRUE/FALSE 09           '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3600 CTY01NM 1/0/T/F/TRUE/FALSE Bedfordshire '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3601 CTY01CD 1/0/T/F/TRUE/FALSE 09           '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## .... ....... .................. ............ ..................................................................................................
## See problems(...) for more details.
urban_rural_classification
# check if any of the parsing problems relate to the columns of interest
columns_of_interest <-  c("MSOA01CD", "RUC01NM", "RUC01CD", "Morphology Name", 
                          "Morphology Code", "Context Name", "Context Code")  

problems(urban_rural_classification) %>% 
  filter(col %in% columns_of_interest)
green_space_urban_rural <- green_space %>% 
  left_join(urban_rural_classification, by = c("msoa_code" = "msoa01cd")) 

green_space_urban_rural
# p <- ggplot(data = green_space_urban_rural,
#             mapping = aes(y = green_space_deprivation_rating))
# 
# p + geom_tile(mapping = aes(x = ruc01cd))


green_space_urban_rural %>% 
  tabyl(green_space_deprivation_rating, morphology_name) %>% 
  adorn_totals("row") %>% 
  adorn_percentages("col") %>% 
  adorn_pct_formatting(rounding = "half up", digits = 0) %>%
  adorn_ns() %>%
  
  gt()
green_space_deprivation_rating Town and Fringe Urban > 10K Village Hamlet & Isolated Dwellings NA_
A 51% (309) 14% (747) 84% (566) 13% (19)
B 7% (41) 24% (1287) 0% (0) 21% (32)
C 41% (248) 25% (1345) 16% (110) 16% (24)
D 1% (6) 17% (923) 0% (0) 17% (26)
E 0% (2) 20% (1056) 0% (0) 33% (50)
Total 100% (606) 100% (5358) 100% (676) 100% (151)
  # gt() %>%
  # grand_summary_rows(columns = vars(`Town and Fringe`, `Urban > 10K`,
  #                                   `Village Hamlet & Isolated Dwellings`, NA_),
  #                    fns = c(total = ~sum(.))
  #                    )

Some thoughts on where I am in understanding the FoE ratings and green_space_area:

gs_pc_summary
gs_pc_summary %>% 
  select(green_space_deprivation_rating, median, iqr)
green_space %>% 
  select(garden_area_per_capita) %>% 
  summary()
##  garden_area_per_capita
##  Min.   :   0.257      
##  1st Qu.:  56.878      
##  Median :  85.890      
##  Mean   : 117.445      
##  3rd Qu.: 138.730      
##  Max.   :5647.419
green_space
p <- ggplot(data = green_space,
            mapping = aes(x = green_space_area_per_capita,
                          y = garden_area_per_capita)
            )

p + geom_point()

An alternative appraoch - clustering

https://community.alteryx.com/t5/Alteryx-Designer-Knowledge-Base/Standardization-in-Cluster-Analysis/ta-p/302296

p <- ggplot(data = green_space_urban_rural)            

p + geom_density(mapping = aes(x = garden_area_per_capita))

p + geom_density(mapping = aes(x = log(green_space_area_per_capita + 1)))

p + geom_density(mapping = aes(x = log(garden_area_per_capita + 1)))

p + geom_density(mapping = aes(x = pcnt_pop_area_with_go_space_access))

My initial efforts in transforming the data - a log transformation and then scaling values to the unit interval (i.e. 0…1) - proved rather unsuccessful. See the summary stats below, with the transformed values remaining tightly grouped together around the median.

library(tidymodels)
## -- Attaching packages -------------------------------------- tidymodels 0.1.2 --
## v broom     0.7.2      v recipes   0.1.15
## v dials     0.0.9      v rsample   0.0.8 
## v infer     0.5.3      v tune      0.1.2 
## v modeldata 0.1.0      v workflows 0.2.1 
## v parsnip   0.1.4      v yardstick 0.0.7
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
library(tidyverse)
library(skimr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
green_space_urban_rural %>%
  distinct(morphology_name, morphology_code) 
# select variables of interest  
cluster_variables <- green_space_urban_rural %>% 
  select(msoa_name,
         green_space_area_per_capita,
         pcnt_pop_area_with_go_space_access,
         garden_area_per_capita)


unit_interval <- function(x){
  (x - min(x)) / (max(x) - min(x))
}

# transform variable to try to remove skew
cluster_variables %>% 
  mutate(log_green_space_area_per_capita = log(green_space_area_per_capita + 1),
         log_garden_area_per_capita = log(garden_area_per_capita + 1), # +1 require because log(0) is not defined
         ui_log_green_space_area_per_capita = unit_interval(log_green_space_area_per_capita),
         ui_log_garden_area_per_capita = unit_interval(log_garden_area_per_capita)) %>%  
  skim() %>% 
  as_tibble()

So, I wondered about focusing on a subset of the data which could be easier to work with. Perhaps given the focus on green space deprivation it makes sense to remove those clearly green space affluent MSOAs (e.g. those with 10,000’s m2 public green space per capita)

green_space_urban_rural %>% 
  ggplot(mapping = aes(x = green_space_area_per_capita,
                       y = morphology_name,
                       fill = morphology_name)) +
  geom_boxplot()

green_space_urban_rural %>% 
  skim(green_space_area_per_capita) %>% 
  as_tibble() %>% 
  clean_names()
green_space_urban_rural %>% 
  ggplot(mapping = aes(x = green_space_area_per_capita,
                       y = morphology_name,
                       fill = morphology_name)) +
  geom_boxplot() +
  coord_cartesian(xlim = c(0, 1000))

green_space_urban_rural %>% 
  ggplot(mapping = aes(x = garden_area_per_capita,
                       y = morphology_name,
                       fill = morphology_name)) +
  geom_boxplot() +
  coord_cartesian(xlim = c(0, 1000))

green_space_urban_rural %>% 
  ggplot(mapping = aes(x = pcnt_pop_area_with_go_space_access,
                       y = morphology_name,
                       fill = morphology_name)) +
  geom_boxplot() 

green_space_urban_rural %>%
  filter(green_space_area_per_capita < 10000) %>% 
  filter(garden_area_per_capita < 1000) %>% 
  ggplot(mapping = aes(x = log(green_space_area_per_capita),
                       y = log(garden_area_per_capita),
                       colour = morphology_name)) +
  geom_point() +
  facet_wrap(~morphology_name)

# check for na's
cluster_variables %>%
  summarise(across(.cols = everything(),
                   .fns = ~sum(is.na(.))
                   )
            )
# remove labeling columns ahead running clustering algorithm
data_points <- cluster_variables %>%
  select(-msoa_name)

kclust <- kmeans(data_points, centers = 3)

# examine outputs
kclust
## K-means clustering with 3 clusters of sizes 4, 31, 6756
## 
## Cluster means:
##   green_space_area_per_capita pcnt_pop_area_with_go_space_access
## 1                  71978.9107                           32.43886
## 2                  19425.8162                           26.68028
## 3                    114.2361                           31.99417
##   garden_area_per_capita
## 1               264.6099
## 2               271.8511
## 3               116.6490
## 
## Clustering vector:
##    [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##   [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##   [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [112] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [149] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [186] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [223] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [260] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [297] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [334] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [371] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [408] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [445] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [482] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [519] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [556] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [593] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [630] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [667] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [704] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [741] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [778] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [815] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [852] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [889] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [926] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [963] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1000] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1037] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1074] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1111] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1148] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1185] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1222] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1259] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1296] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1333] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1370] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1407] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1444] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1481] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1518] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1555] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1592] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1629] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1666] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1703] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1740] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1777] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1814] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1851] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1888] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1925] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1962] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1999] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2036] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2073] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2110] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2147] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2184] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2221] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2258] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2295] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2332] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2369] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2406] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2443] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2480] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2517] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2554] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2591] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2628] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2665] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2702] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2739] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2776] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2813] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2850] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2887] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2924] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2961] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2998] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3035] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3072] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3109] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3146] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3183] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3220] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3257] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3294] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3331] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3368] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3405] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3442] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3479] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3516] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3553] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3590] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3627] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3664] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3701] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3738] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3775] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3812] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3849] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 2 3 3 3 3 2 3 3 3
## [3886] 3 3 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 3 3 3 2 2 2 2 3
## [3923] 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3960] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3997] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4034] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4071] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4108] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 2 3 3 3 3 3 3 3 3 3 3
## [4145] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4182] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4219] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 2 3 3 3 3 3 3 3
## [4256] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4293] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4330] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4367] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4404] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4441] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4478] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4515] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4552] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4589] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4626] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4663] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4700] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4737] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4774] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4811] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4848] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4885] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4922] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4959] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4996] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5033] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5070] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5107] 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5144] 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5181] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5218] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5255] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5292] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5329] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5366] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5403] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5440] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5477] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5514] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5551] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 2 3 3 3 3 3 3
## [5588] 3 3 3 3 3 3 3 3 3 3 3 3 1 2 3 3 3 3 2 3 3 3 3 3 3 3 3 2 1 3 2 3 3 3 3 3 3
## [5625] 3 3 3 3 3 3 3 3 3 2 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 2
## [5662] 3 3 3 3 3 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5699] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5736] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5773] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5810] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5847] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5884] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5921] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5958] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5995] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6032] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6069] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6106] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6143] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6180] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6217] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6254] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6291] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6328] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6365] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6402] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6439] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6476] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6513] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6550] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6587] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6624] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6661] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6698] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6735] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6772] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## 
## Within cluster sum of squares by cluster:
## [1] 1566598682 1859441226 1868590580
##  (between_SS / total_SS =  85.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
summary(kclust)
##              Length Class  Mode   
## cluster      6791   -none- numeric
## centers         9   -none- numeric
## totss           1   -none- numeric
## withinss        3   -none- numeric
## tot.withinss    1   -none- numeric
## betweenss       1   -none- numeric
## size            3   -none- numeric
## iter            1   -none- numeric
## ifault          1   -none- numeric
# add cluster identifier to the source data
augment(kclust, data_points)
# summarise per cluster
tidy(kclust)
glance(kclust)
# subset variables to include in clustering
data_points_subset <- data_points %>% 
  select(green_space_area_per_capita,
         garden_area_per_capita)

data_points_subset %>% 
  summary()
##  green_space_area_per_capita garden_area_per_capita
##  Min.   :     0.00           Min.   :   0.257      
##  1st Qu.:     8.19           1st Qu.:  56.878      
##  Median :    19.17           Median :  85.890      
##  Mean   :   244.72           Mean   : 117.445      
##  3rd Qu.:    48.03           3rd Qu.: 138.730      
##  Max.   :101465.57           Max.   :5647.419
data_points_subset
# run k_means clustering for k = 1 ... 9
# and for each cluster create outputs 
kclusts <- tibble(k = 1:9) %>% 
  mutate(k_clust = map(k, ~kmeans(data_points_subset, .x)),
         tidied = map(k_clust, tidy),
         glanced = map(k_clust, glance),
         augmented = map(k_clust, augment, data_points_subset)
         )

kclusts
# separate outputs for use in plotting
clusters <- kclusts %>% 
  unnest(cols = c(tidied))
clusters
assignments <- kclusts %>% 
  unnest(cols = c(augmented))
assignments
clusterings <- kclusts %>% 
  unnest(cols = c(glanced))
clusterings
p1 <- ggplot(data = assignments %>% 
               filter(garden_area_per_capita < 1200), # remove single outlier at approx 5000 m2
             mapping = aes(x = green_space_area_per_capita,
                           y = garden_area_per_capita)
             )

p1 + geom_point(mapping = aes(colour = .cluster)) +
  facet_wrap(~ k)

gs_bar <- function(){
  
  list(
      geom_col(),
      
      scale_fill_manual(values = colour_scale),
      
      geom_text(mapping = aes(label = bar_label), hjust = 1.1, colour = "white", size = 3.5),
      
      guides(fill = FALSE),
      
      theme(axis.text.x = element_text(face = "bold"),
            axis.text.y = element_text(face = "bold", size = 15),
            plot.title = element_text(face = "bold", size = 15)
            ),
      
      coord_flip()
  )
  
}

bar_labels <- c("Large or very large gardens and large or \nvery large amounts of public green space",
                           "",
                           "",
                           "",
                           "Very small gardens and very small\namounts of public green space")

x_lab <-  "Green space deprivation rating\n\n"
caption_lab <-  "Source: Friends of the Earth, England's Green Space Gap"
p <- ggplot(data = msoa_and_pop_by_rating,
            mapping = aes(x = green_space_deprivation_rating, 
                          y = n_msoa,
                          fill = green_space_deprivation_rating)
            )

p + gs_bar() +
  
  labs(title = "Numbers of neighbourhoods in England by green space\ndeprivation rating\n",
       x = x_lab,
       y = "\nNumber of Neighbourhoods\n",
       caption = caption_lab) 

Below I plot the proportions of the 6791 MSOAs analyzed given each green space deprivation rating.

p <- ggplot(data = msoa_and_pop_by_rating,
            mapping = aes(x = green_space_deprivation_rating, 
                          y = prop_msoa,
                          fill = green_space_deprivation_rating)
            )

p + gs_bar() +

  labs(title = "Proportion of neighbourhoods in England by green space\ndeprivation rating\n",
       x = x_lab,
       y = "Proportion of neighbourhoods",
       caption = caption_lab)
  

msoa_by_rating
p <- ggplot(data = msoa_and_pop_by_rating,
            mapping = aes(x = green_space_deprivation_rating,
                          y = population,
                          fill = green_space_deprivation_rating)
            )

p + gs_bar() +
  
  labs(title = "The population living within neighbourhoods with each\ngreen space deprivation rating\n",
       y = "\nPopulation",
       x = x_lab)

The distribution of green space deprivation ratings across the regions

The two plots below, respectively show for each region the numbers or proportion of MSOA with each green space deprivation rating. Key insights from the two plots include:

  • Over 45% of London’s MSOAs have been given the lowest rating (E). A further approximately 30% of London’s MSOAs are rated are rated D;
  • In the other regions (excluding London), the proportion of MSOAs with the lowest rating (E) in each is in the range of approximately 5-15%. The demonstrates that green space deprivation is a nationwide issue, that is perhaps unsurprisingly prominent in London (the largest city)
  • The North-East and the North-West have very similar distributions of MSOA green space deprivation ratings. With the a fairly symetrical and bell shaped distribution, centred on the C rating. Perhap reflecting a mix
  • The East of England, the South East and the South West have similar distribution, where the most commonly occurring rating is A.
  • Overall, there are considerable inequities within each region in terms of access to green space. All regions include a mix of green space deprived and affluent neighbourhoods.
msoa_by_region_and_rating <- green_space %>% 
  
  # count by region and rating
  group_by(region_name, green_space_deprivation_rating) %>% 
  summarise(n_msoa = n()) %>% 
  filter(!is.na(region_name)) %>% 
  ungroup() %>% 

  # calculate rating proportions by region
  group_by(region_name) %>% 
  mutate(prop = n_msoa / sum(n_msoa)) %>% 
  ungroup()

msoa_by_region_and_rating

plot_msoas_by_region <-  function(y_var){
  
  p <- ggplot(data = msoa_by_region_and_rating,
            mapping = aes(x = green_space_deprivation_rating, y = .data[[y_var]])
            )

  p + geom_bar(mapping = aes(fill = green_space_deprivation_rating),
             stat = "identity",
             colour = "grey80") +
  
  scale_fill_brewer(type = "div",
                    palette = "RdYlGn",
                    direction = -1) +
  
  guides(fill = FALSE) +
  
  coord_flip() +
  
  facet_wrap(~ region_name, ncol = 3)
}

plot_msoas_by_region("n_msoa")
plot_msoas_by_region("prop")

Below I plot again plot the proportion of MSOAs within each region receiving each green space deprivation rating. This time faceting the plot by green space deprivation rating rather than region. This makes it easier to compare across regions at a given rating.

  • So we can clearly see the South East and Southwest have higher proportions of A rated MSOAs than say the North East and North West.
  • While outside of London, within each region there are similar proportion of neighbourhoods receiving the two highest green space deprivation ratings (D and E).

It would be easier to read if I produced separate plots for each green space deprivation rating, as then the regions could be put in rank order.

prop_a <- msoa_by_region_and_rating %>%
  filter(green_space_deprivation_rating == "A") %>% 
  mutate(prop_a = prop) %>% 
  select(region_name, prop_a)

msoa_by_region_and_rating <- msoa_by_region_and_rating %>% 
  left_join(prop_a)
  
msoa_by_region_and_rating  

p <- ggplot(data = msoa_by_region_and_rating,
            mapping = aes(x = reorder(region_name, prop_a), y = prop, fill = region_name)
            )

p + geom_bar(stat = "identity") +
  facet_wrap(~ green_space_deprivation_rating) +
  
  guides(fill = FALSE) +
  
  coord_flip()

Next I explored an alternative appraoch to considering the distribution of msoas by rating and region. I plotted the proportion of MSOAs that received a specific rating by region. In order to address the ordering issue above, I produced one plot for each green space rating. This meant addressing the challenge of how to ensure the colour associated with a given region was applied consistently across the five plots. Here is where I found the an approach to doing this, using scale_…_manual.

How to map a colour to a value of a categorical variable …

This approach help me identify some additional insights:

  • For the highest green space deprivation ratings (D and E), the ranking of regions is the same: (1) London, (2) North West, (3) South East, (4) Yorkshire and the Humber and (5) West Midlands.
  • Over half of MSOAs given the highest green space deprivation rating are accounted for by just two regions: (1) London and (2) the North West.
# The idea is to produce one bar chart for each green space rating, and across these plots use the same colour for a given region. 
# choose a palette to work with
region_pal <- brewer.pal(9, "Paired")

# map the region names to a specific colour value (in a dataframe for ease)
region_colours_df <- msoa_by_region_and_rating %>% 
  select(region_name) %>% 
  distinct(region_name) %>% 
  add_column(colour = region_pal)

# convert the region-colour mapping from a dataframe to a vector
# because a vector is required by scale_fill_manual
region_colours_vector <- region_colours_df$colour
names(region_colours_vector) <- region_colours_df$region_name
region_colours_vector


plot_gsdr_prop_by_region <- function(rating = "A"){
  
    msoa_by_region_and_rating %>%
      
    # focus on the rating of interest
    filter(green_space_deprivation_rating == rating) %>% 
    mutate(proportion = n_msoa / sum(n_msoa)) %>%
    
    # create the bar chart
    ggplot(mapping = aes(x = reorder(region_name, proportion), 
                         y = proportion,
                         fill = region_name
                         )
           ) +
      
    geom_col() +
    
    # apply region-colour mapping  
    scale_fill_manual(values = region_colours_vector) +
    
    guides(fill = FALSE) +
      
    labs(title = str_c("MSOAs receiving green space deprivation rating: ", rating),
         x = NULL,
         y = str_c("Proportion of the MSOAs that recieved a rating of ", rating)
         ) +
    
    coord_flip()
}


# produce one plot for each of the five green space deprivation ratings
gsdr_prop_by_region_plots <- c("A", "B", "C", "D", "E") %>% 
  map(~ plot_gsdr_prop_by_region(.x))

gsdr_prop_by_region_plots
d_plot <- gsdr_prop_by_region_plots[[4]] + 
  ylim(0, 0.45) # ensure the plot axis scales match
  
e_plot <- gsdr_prop_by_region_plots[[5]] + 
  ylim(0, 0.45) # ensure the plot axis scales match

plot_grid(d_plot, e_plot, ncol = 2)

Could do ridgeline plots for each region - for prop rated A and proportion rated E

The geographic distribution of green space deprivation ratings

https://datacarpentry.org/r-raster-vector-geospatial/06-vector-open-shapefile-in-r/

A quick visual inspection of the MSOAs colored by their green space deprivation rating, shows a similiar patter across the regions (with the exception of London). With the the D and E ratings (oranges and reds) occurring in smaller (presumably more densely populated MSOAs) which make up urban areas. While the larger, more rural MSOAs tend to be less green space deprived, and have A or B ratings. Given the whole region of London would probably be considered a continuous urban space, it is unsurprising to observe many green space deprived MSOAs across the region/plot, with relatively few less green space deprived areas present.

green_space_region_plot <- function(region){

  p <-  ggplot(data = msoa_sf_gs %>% 
                 filter(region_name == region),
               mapping = aes(fill = green_space_deprivation_rating)
               )
  
  p + geom_sf(colour = "grey75") +
    
    scale_fill_brewer(type = "div",
                      palette = "RdYlGn",
                      direction = -1) +
    
    labs(title = str_c("...", region))
}

region_names <- green_space %>%
  select(region_name) %>% 
  distinct(region_name)

region_names %>%
  filter(!is.na(region_name)) %>% 
  pmap(~ green_space_region_plot(region = .x))

Ethnicity and green space deprivation

green_space <- green_space %>% 
  mutate(prop_BAME_pop = BAME_Pop / Total_Pop_From_Ethnicity_Data) %>%
  relocate(prop_BAME_pop, .after = BAME_Pop)

p <- ggplot(data = green_space,
            mapping = aes(x = green_space_deprivation_rating,
                          y = prop_BAME_pop)
            )

p + geom_jitter(alpha = 0.5, colour = "grey75") +
  geom_boxplot(mapping = aes(fill = green_space_deprivation_rating), 
               alpha = 0.5, 
               size = 1.25,
               colour = "grey25") +
  
  scale_fill_brewer(type = "div",
                      palette = "RdYlGn",
                      direction = -1) +
  
  guides(fill = FALSE) +
  
  coord_flip()
p <- ggplot(data = green_space,
            mapping = aes(y = green_space_deprivation_rating,
                          x = prop_BAME_pop,
                          fill = green_space_deprivation_rating)
            )

p + geom_density_ridges(alpha = 0.6, scale = 1.5) +
  scale_fill_brewer(type = "div",
                      palette = "RdYlGn",
                      direction = -1) 

Wealth and green space deprivation

Health and green space deprivation

Covid and green space deprivation

Archived code

p <- ggplot(data = msoa_by_rating,
            mapping = aes(x = green_space_deprivation_rating, y = n_msoa)
            )

p + geom_segment(mapping = aes(x = green_space_deprivation_rating,
                               xend = green_space_deprivation_rating,
                               y = 750,
                               yend = n_msoa),
                 colour = "grey50",
                 size = 1.5
                 ) +
  
  geom_point(mapping = aes(fill = green_space_deprivation_rating),
             size = 7.5,
             shape = 21, 
             colour = "grey50", 
             stroke = 1.25
             ) +
  
  scale_fill_brewer(type = "div",
                    palette = "RdYlGn",
                    direction = -1) +
  
  
  labs(title = title_lab,
       subtitle = subtitle_lab,
       x = x_lab,
       y = y_lab,
       caption = caption_lab) +
  
  guides(fill = FALSE) +
  
  # facet_wrap(~ green_space_deprivation_rating) +
  
  coord_flip()
Russo, Alessio, and Giuseppe Cirella. 2018. “Modern Compact Cities: How Much Greenery Do We Need?” International Journal of Environmental Research and Public Health 15 (10): 2180. https://doi.org/10.3390/ijerph15102180.